function [Record_fitness, theta_opt] = fit_EM_GPR(y, x, p, theta_initial, params, is_disp)
    
    if ~exist('is_disp', 'var')
        is_disp = false;
    end

    c           = params.c;
    k0          = params.k0;
    sigma2_n    = params.sigma2_n;
    
    N = length(y);

    v = theta_initial.v;
    mu = theta_initial.mu;

    rho = vecnorm(y)^2/N/(1+sigma2_n);                 % Initial channel power.
    
    N_iter          = 20;
    Record_fitness  = zeros(N_iter, 3);
    
    % init for mu. 

    mu_R = 10;
    sigma2_mu = 3000;
    Nig = 10;               % initial guess
    mu_directions = asin(linspace(-1+1/Nig, 1-1/Nig, Nig));
    Clip = 30;


    ell_ig = zeros(Nig, 1);
    for idx_guess = 1:Nig
        mu = mu_R * [sin(mu_directions(idx_guess)); 0; -cos(mu_directions(idx_guess))];

        theta = struct('rho', rho, 'v', v, 'mu', mu);   % Set parameters.
        [~, ~, ell_ig(idx_guess), ~] = forward(x, p, theta, k0, c, sigma2_n, N, y, Clip);
    end

    [~, idx_M] = max(ell_ig);

    mu = mu_R * [sin(mu_directions(idx_M)); 0; -cos(mu_directions(idx_M))];
    
    % init forward 
    theta = struct('rho', rho, 'v', v, 'mu', mu);   % Set parameters.
    % [A, B, ell, d] = forward(x, p, theta, k0, c, sigma2_n, N, y, Clip);
    % r = d;
    
    mu_arr = zeros(3, N_iter);
    
    for iter = 1:N_iter
        
        N_inner = 200; 
        Qs = zeros(N_inner, 1);

        for iter_inner = 1:N_inner
            % Estimate h given y and theta0. 
            [Kh, grads] = get_K(x, p, theta, k0, c); 
            Kh_inv = inv(Kh);
            mean_h = Kh*((Kh + sigma2_n*eye(N))\y); 
            cov_h = inv(inv(Kh) + 1/sigma2_n*eye(N)); 
    
            % Estimate theta given p(h|y, theta0) based on log p(y, h|theta). 
            % Target function: q( theta | theta0 ) = E{-h'*inv(Kh)*h - logdet(Kh)}
            
            % Evaluate Q
            A           = cov_h + mean_h*mean_h';

            Qs(iter_inner)  = real(-log(det(Kh)) - trace(Kh_inv*A))- norm(mu)^2/sigma2_mu;  
            
            d_Q_Kh      = -conj(Kh_inv) + (Kh_inv*A*Kh_inv).'; 
            gmus        = [grads.mu];
            d_ell_mu    = 2*real(gmus*d_Q_Kh(:));
    
            % Update parameter. 
            

            % d = d_ell_mu; 
            d   = d_ell_mu - 2*mu/sigma2_mu;  

            mu = mu + 3*d;
            theta = struct('rho', rho, 'v', v, 'mu', mu);   
        end
        
        plot(Qs); 
        
        % Compute marginal log likelihood function. 
        Ky = Kh + sigma2_n*eye(N);                           % Compute the noisy kernel Ky. 
        Ky_inv_y = Ky\y;                                
        % R = chol(Ky);  
        
        A = real(-y'*Ky_inv_y);   B = -real(log(det(Ky)));
        ell = A+B - norm(mu)^2/sigma2_mu;  
        

        Record_fitness(iter, 1) = ell;
        Record_fitness(iter, 2) = A;
        Record_fitness(iter, 3) = B;
        
        % rho = rho * exp(eta_rho*rho*d_ell_rho);
        
        if is_disp
            fprintf('%d: fitness = %f, mu = [%.4f, %.4f, %.4f], rho = %.4f\n',...
                iter, ell, mu(1), mu(2), mu(3), rho);
        end
    
        mu_arr(:, iter) = mu;
    end

    theta_opt = struct('rho', rho, 'v', v, 'mu', mu); 
    
    if is_disp
        save('mus.mat', 'mu_arr'); 
        fprintf('File saved.\n'); 
    end

end

